# -*- coding:utf8 -*-
# !/usr/bin/env python

'''
#全国企业信用信息公示系统（北京）
#维护黄羽
#公司基本情况
'''

import re
from utils import kill_captcha
from scpy.request_util import *
from scpy.logger import get_logger
import time
import requests
import sys
import urllib

reload(sys)
sys.setdefaultencoding('utf8')

logger = get_logger(__file__)


def get_base_info(companyName):
    """
    获取公司基本信息网页
    当验证码错误，或者验证码服务出现错误时，重复下载验证码并破解;
    在下载网页的过程中对方服务出现错误,重新该下载网页(目前的方式是重新破解验证码，重新下载)
    :param companyName: 公司名字或者注册号
    :return:None 或者　字符串
    若公司不存在,返回None;
    若公司存在返回公司基本信息网页;
    """
    index_url = 'http://qyxy.baic.gov.cn/beijing'
    index_header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': 'qyxy.baic.gov.cn',
        'Referer': 'http://gsxt.saic.gov.cn/',
        'Upgrade-Insecure-Requests': '1',
    }

    # index_req = RequestUtil()
    # index_req.set_hreaders(index_header)
    # index_res = index_req.make_request(index_url, timeout=200)

    index_req = requests.session()
    index_req.headers = index_header
    index_res = index_req.get(index_url)

    index_set_cookie_fist = index_res.headers.get('set-cookie', '')
    if index_set_cookie_fist:
        index_set_cookie = index_set_cookie_fist.replace("path=/", "").replace(",", "").replace(" ", "")
    else:
        raise Exception("cookie 获取失败！")

    logger.info("当前cookie为：%s", index_set_cookie)

    index_html = index_res.content
    check_code_servlet_name = re.findall('var checkCodeServletName = "(.*?)";', index_html)
    logger.info("当前验证码服务器类型为：%s", check_code_servlet_name)

    current_time_millis = re.findall('id="currentTimeMillis" value="(\d+?)"/>', index_html)
    credit_ticket = re.findall('id="credit_ticket" value="(.*?)"/>', index_html)
    if current_time_millis and credit_ticket:
        current_time_millis = current_time_millis[0]
        credit_ticket = credit_ticket[0]
    else:
        logger.error("网页发生变化！")
        raise Exception("网页发生变化！")

    if 'CheckCodeCaptcha' in check_code_servlet_name:
        # 字符网站 bj1
        crack_server_code = 'bj1'
        img_url = 'http://qyxy.baic.gov.cn/CheckCodeCaptcha?currentTimeMillis=' + current_time_millis
    elif 'CheckCodeYunSuan' in check_code_servlet_name:
        # 运算网站 bj
        crack_server_code = 'bj'
        img_url = 'http://qyxy.baic.gov.cn/CheckCodeYunSuan?currentTimeMillis=' + current_time_millis
    else:
        logger.error("验证码网页发生变化,重新破解！")
        return ''
    img_headers = {
        'Accept': 'image/webp,image/*,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': index_set_cookie,
        'Host': 'qyxy.baic.gov.cn',
        'Referer': 'http://qyxy.baic.gov.cn/beijing',
    }
    # img_req = index_req
    img_req = RequestUtil()
    img_req.set_hreaders(img_headers)
    try:
        captcha = img_req.make_request(img_url, timeout=200).content
    except Exception, e:
        logger.error("从网站下载验证码失败！重复下载！")
        logger.error(e)
        raise Exception("download captcha error")
    if not captcha:
        logger.error("从网站下载验证码为空！重复下载！")
        return ''

    # with open('./bj1.jpg', 'wb') as fp:
    #     fp.write(captcha)

    try:
        res_code = kill_captcha(captcha, crack_server_code, 'jpeg')
        # print 'res code: ', res_code
    except Exception, e:
        logger.error("破解验证码的服务出现异常")
        logger.error(e)
        raise e
    if not res_code and len(res_code) > 100:
        logger.info('验证码为:%s' % res_code)
        logger.error("破解验证码的服务出现异常,可能是下载的验证码错误，也可能破解服务出现异常！")
        return ''  # 返回空字符串，用于重复破解
    # res_code = raw_input('base, code=')

    check_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!checkCode.dhtml'
    literal_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!findLiteralWord.dhtml'
    com_list_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!getBjQyList.dhtml'
    check_headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Content-Length': '135',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Cookie': index_set_cookie,
        'Host': 'qyxy.baic.gov.cn',
        'Origin': 'http://qyxy.baic.gov.cn',
        'Referer': 'http://qyxy.baic.gov.cn/beijing',
        'Upgrade-Insecure-Requests': '1',
    }
    check_data = {
        'currentTimeMillis': current_time_millis,
        'credit_ticket': credit_ticket,
        'checkcode': res_code,
        'keyword': companyName,
    }
    check_req = img_req
    check_req.set_hreaders(check_headers)
    check_res = check_req.make_request(check_url, data=check_data, method='post', timeout=100).content
    logger.info("网站返回：%s", check_res)
    if check_res == 'success':
        logger.info("验证码正确！网站返回：%s", check_res)
    # else:
    #     logger.error("验证码破解错误，重复破解！")
    #     return ''

    elif check_res == 'fail' or re.findall('访问异常', check_res):
        logger.error("验证码破解错误或访问异常，延时，重复破解！")
        time.sleep(random.random())
        # logger.error("验证码破解错误，重复破解！")
        return ''
    else:
        logger.error("网页发生变化！")
        raise Exception("网页发生变化！")

    literal_res = check_req.make_request(literal_url, data=check_data, method='post', timeout=20)
    logger.info("%s", literal_res)

    com_list_html = check_req.make_request(com_list_url, data=check_data, method='post', timeout=20).content

    com_list = re.findall('onclick="openEntInfo\((.*?)\);', com_list_html)
    if com_list:
        logger.info("搜索的公司存在！")
        com_temp = com_list[0].replace("'", "").replace(" ", "").split(',')
    else:
        logger.info("搜索的公司不存在！")
        return None

    # 基本信息
    base_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!openEntInfo.dhtml?'
    # 股东
    share_holder_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!tzrFrame.dhtml?'
    # 变更信息
    alter_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!biangengFrame.dhtml?'
    # 清算，第二页
    liquidation_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!qsxxFrame.dhtml?'
    # 主要人员
    person_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!zyryFrame.dhtml?'
    # 抽查检查
    checkMessage_url = 'http://qyxy.baic.gov.cn/gsgs/gsxzcfAction!list_ccjcxx.dhtml?'
    # 经营异常
    abnormal_operation_url = 'http://qyxy.baic.gov.cn/gsgs/gsxzcfAction!list_jyycxx.dhtml?'

    asic_headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': index_set_cookie,
        'Host': 'qyxy.baic.gov.cn',
        'Referer': 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!getBjQyList.dhtml',
        'Upgrade-Insecure-Requests': '1',
    }
    com_base_req_time_stamp = int(round(time.time() * 1000))
    asic_req = check_req
    asic_req.set_hreaders(asic_headers)
    com_base_data = {
        'entId': com_temp[1],
        'credit_ticket': com_temp[3],
        'entNo': com_temp[2],
        'timeStamp': com_base_req_time_stamp,
    }

    base_res = asic_req.make_request(base_url, data=com_base_data, method='get').content
    if '访问异常' in base_res:
        logger.error("在获取基本信息时,网站访问异常,重复访问！")
        return ''
    # base_res = BeautifulSoup(base_res, 'html5lib')
    # base_res = str(base_res)
    asic_res_html = base_res

    util_referer = base_url + urllib.urlencode(com_base_data)
    util_headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': index_set_cookie,
        'Host': 'qyxy.baic.gov.cn',
        'Referer': util_referer,
        'Upgrade-Insecure-Requests': '1',
    }

    alter_data = {
        'ent_id': com_temp[1],
        'clear': 'true',
        'timeStamp': int(round(time.time() * 1000)),
    }
    asic_req.set_hreaders(util_headers)
    alter_res = asic_req.make_request(alter_url, data=alter_data, method='get').content
    if '访问异常' in alter_res:
        logger.error("在获取变更信息时,网站访问异常,重复访问！")
        return ''
    # print 'alter_res',alter_res
    asic_res_html += alter_res

    share_holder_data = {
        'ent_id': com_temp[1],
        'entName': '',
        'clear': 'true',
        'timeStamp': int(round(time.time() * 1000)),
    }
    share_holder_res = asic_req.make_request(share_holder_url, data=share_holder_data, method='get').content
    if '访问异常' in share_holder_res:
        logger.error("在获取股东信息时,网站访问异常,重复访问！")
        return ''
    # print 'share_holder_res',share_holder_res
    asic_res_html += share_holder_res

    liquidation_res = asic_req.make_request(liquidation_url, data=share_holder_data).content
    if '访问异常' in liquidation_res:
        logger.error("在获取清算信息时,网站访问异常,重复访问！")
        return ''
    # print 'liquidation_res',liquidation_res
    asic_res_html += liquidation_res

    person_res = asic_req.make_request(person_url, data=share_holder_data).content
    if '访问异常' in person_res:
        logger.error("在获取主要人员信息时,网站访问异常,重复访问！")
        return ''
    # print 'person_res',person_res
    asic_res_html += person_res

    check_message_res = asic_req.make_request(checkMessage_url, data=share_holder_data).content
    if '访问异常' in check_message_res:
        logger.error("在获取抽查检查信息时,网站访问异常,重复访问！")
        return ''
    # print 'checkMessage_res',check_message_res
    asic_res_html += check_message_res

    abnormal_operation_data = {
        'entId': com_temp[1],
        'clear': 'true',
        'timeStamp': str(int(round(time.time() * 1000))),
    }

    abnormal_operation_res = asic_req.make_request(abnormal_operation_url, data=abnormal_operation_data).content

    if '访问异常' in abnormal_operation_res:
        # pass
        logger.error("在获取抽查检查信息时,网站访问异常,重复访问！")
        return ''
    # print 'abnormal_operation_res',abnormal_operation_res
    asic_res_html += abnormal_operation_res
    return asic_res_html


def base_info_run(companyName, MAXTIME=40):
    """
    下载年公司基本信息的控制部分.
    当验证码错误，或者验证码服务出现错误时，重复下载验证码并破解;
    重复破解次数为MAXTIME次,超过这个次数后,还没有破解成功的话,抛出异常.
    :param companyName:公司名字或或者工商号.
    :param MAXTIME:最多次数
    :return:
    若公司不存在,返回None;
    若公司存在,返回html源码;
    """
    res = ''
    # MAXTIME = 20
    a_time = MAXTIME
    while a_time > 0:
        # print res, '*'*20
        if res is None:
            return None
        elif res == '':
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                # time.sleep(10)
                res = get_base_info(companyName)
            except Exception, e:
                import traceback
                traceback.print_exc(e)
                raise e
        else:
            return res
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)


if __name__ == "__main__":
    # companyName = '北京百度糯米信息技术有限公司'
    companyName = '北京天下神威科技有限公司'
    asic_res_html = base_info_run(companyName)
    print asic_res_html
    # import pdb
    # pdb.set_trace()
